package exp.ranking.fe;

import java.util.HashMap;
import java.util.List;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.TokenSequence;

/**
 * Given a List of keyWords. These keywords will be deleted through this pipe.
 * @author Lanjun
 *
 */
public class KWFilterPipe extends Pipe {

	private static final long serialVersionUID = -5788228584351545390L;
	private HashMap<String, Boolean> keyWords;
	
	public KWFilterPipe(List<String> keyWords) {
		this.keyWords = new HashMap<>();
		for (String key : keyWords) {
			this.keyWords.put(key, true);
		}
	}
	
	
	@Override
	public Instance pipe(Instance carrier) {
		if (carrier.getData() instanceof TokenSequence) {
			TokenSequence ts = (TokenSequence) carrier.getData();
			for (int i = 0; i < ts.size(); i++) {
				if (keyWords.containsKey(ts.get(i).getText())) {
					ts.remove(i);
					i--;
				}
			}
			carrier.setData(ts);
		} else {
			System.err.println("ERROR: input should be TokenSequence");
		}
		return carrier;
	}
}
